
###### Exerccios Aula 8 - Augusto Bitencourt ######

##### Uma estimativa da incerteza na previso do modelo #####

babies <- read.csv(file="babies.txt", header=TRUE, sep="")
head(babies)
babies$bwt[babies$bwt == "999"] <- NA
babies$gestation[babies$gestation == "999"] <- NA
babies$parity[babies$parity == "9"] <- NA
babies$age[babies$age == "99"] <- NA
babies$height[babies$height == "99"] <- NA
babies$weight[babies$weight == "999"] <- NA
babies$smoke[babies$smoke == "9"] <- NA
head(babies)

babies <- babies[c(-4,-90,-94, -99, -155,-243,-651,-707,-740,-880,-964,-972,-1193),]  
babies  

dados.gestacao.v <- seq(from=min(babies$gestation, na.rm=TRUE), to=max(babies$gestation, na.rm=TRUE), length= 150)
dados.gestacao.v #valores de "x"
media.x <- mean(dados.gestacao.v)
media.x

mod.1 <- lm(bwt~ gestation, data=babies)
coef(mod.1)

estimativa.bwt <- -10.0041272 + 0.4640517*dados.gestacao.v #valores de "y"
estimativa.bwt

SSX <- sum((dados.gestacao.v - media.x)^2)
S2 <- var(estimativa.bwt)
n <- length(estimativa.bwt)

se.y = sqrt(S2*( (1/n) + ((( dados.gestacao.v - media.x  )^2)/SSX) ))
se.y

valor.t <- abs(qt(p=0.025, df=n-2, lower.tail= T)) 
valor.t

int.conf <- se.y * valor.t
int.conf

plot(bwt~gestation, babies) 
abline(mod.1, col="purple", lwd=2) 
lines(estimativa.bwt + int.conf~ dados.gestacao.v, col= "red", lwd=2) 
lines(estimativa.bwt - int.conf~ dados.gestacao.v, col= "green", lwd=2) 




##### Galileu estava Certo? #####

init.h = c(600, 700, 800, 950, 1100, 1300, 1500)
h.d = c(253, 337, 395, 451, 495, 534, 573)

mod1 <- lm(h.d~init.h)
mod2 <- update(mod1,.~. +I(init.h^2))
mod3 <- update(mod2,.~. + I(init.h^3))
anova(mod1,mod2)

cf.m2 <- coef(mod2)
cf.m3 <- coef(mod3)

summary(mod2)
plot(h.d~init.h)
abline(mod1)
curve(cf.m2[1]+cf.m2[2]*x+cf.m2[3]*x^2, add=T, lty=1, col="blue")
curve(cf.m3[1]+cf.m3[2]*x+cf.m3[3]*x^2+cf.m3[4]*x^3, add=T, lty=1, col="red")

anova(mod2,mod3)

# Sim, um polinmio de terceiro grau  melhor para descrever os dados do
# experimento de Galileo (reduo dos resduos em comparao com mod.2,
# com "p" significativo)




##### Massa de Recm-Nascidos #####

babies <- read.csv(file="babies.txt", header=TRUE, sep="")
head(babies)
babies$bwt[babies$bwt == "999"] <- NA
babies$gestation[babies$gestation == "999"] <- NA
babies$parity[babies$parity == "9"] <- NA
babies$age[babies$age == "99"] <- NA
babies$height[babies$height == "99"] <- NA
babies$weight[babies$weight == "999"] <- NA
babies$smoke[babies$smoke == "9"] <- NA
head(babies)

babies <- babies[c(-4,-90,-94, -99, -155,-243,-651,-707,-740,-880,-964,-972,-1193),]  
babies  
head(babies)
tail(babies)

mod.0 <- lm(bwt ~ 1, data=babies)

mod.1 <- lm(bwt ~ gestation, data=babies)
summary(mod.1)
anova(mod.0,mod.1)

mod.2 <- update(mod.1, .~. + parity)
summary(mod.2)
anova(mod.1, mod.2)

mod.3 <- update(mod.2, .~. + age)
summary(mod.3)
anova(mod.3)

mod.4 <- update(mod.3, .~. + height)
summary(mod.4)
anova(mod.4)

mod.5 <- update(mod.4, .~. + weight)
summary(mod.5)
anova(mod.5)

mod.6 <- update(mod.5, .~. + smoke)
summary(mod.6)
anova(mod.6)

mod.escolhido <- lm(bwt ~ gestation + parity + height + weight + smoke, data=babies)
anova(mod.escolhido)

# O modelo que melhor explica a variao de bwt, com o mnimo de resduos,  aquele
# que contm as variveis preditoras: gestation + parity + height + weight + smoke.




